/* Relative to prior "newcollapse.do", this just adds academic credits in first year as outcome 

NOTE: MUST RUN analysis_restatrev_additional_041509.do PRIOR TO RUNNING THIS PROGRAM!!!!


Relative to newcollapse041509.do, it adds N's to the collapse for earnings year7
*/


!date

cap log close
clear
program drop _all
set linesize 155

*cap do ${fmartorell_home}top_program
global d1="${col_remediation}program/program_paco/publication/"
do ${d1}do/top_program.do

global basepath=""   /* <- SET DIRECTORY HERE. */

*log using ${basepath}/newcollapse041509.log, replace
log using ${d1}log/newcollapse021017pm.log, replace

*set mem 1000m
set mem 2g
set maxvar 30000


*use ${fmartorell_home}/tmp/tmp3, clear 
use ${d1}tmp/tmp3, clear 

cap drop _m






cap drop _m

sort altpid 
*merge altpid using ${fmartorell_home}remediation/data/taspearn_allyears, unique sort keep(earn_yr5 earn_yr6 earn_yr7) _merge(newmerge)
merge altpid using ${d1}data/taspearn_allyears, unique sort keep(earn_yr5 earn_yr6 earn_yr7) _merge(newmerge)
tab newmerge
drop newmerge


forvalues yr=5/7 {
 gen byte posearn`yr'=(earn_yr`yr')>0 & earn_yr`yr'<. if earn_yr`yr'<. 
	/* NEED TO SET POSEARN VARS TO . IF EARN VARS . SINCE TMP3.DTA HAS OBS FROM LATEST COHORTS THAT AREN'T PART OF EARNINGS SAMPLE */ 
 gen copearn_yr`yr'=earn_yr`yr' if posearn`yr'==1
}

*For yr 7 need to set earnings var to .
replace posearn7=. if (tspyr>198 | (tspyr==198 & firstsem>1))
replace earn_yr7=. if (tspyr>198 | (tspyr==198 & firstsem>1))
replace copearn_yr7=. if (tspyr>198 | (tspyr==198 & firstsem>1))

memory 

*Covariates for all regressions
global covs="distmiss distl25nm distm50nm indistnm white hisp startfall nontradage d192-d199 econ_dis misecon dacyr1992-dacyr2000 delay2"

foreach test in w m r any {
 cap drop x x2 x3 inter inter2 inter3 pass
 gen x=rsc_`test'
 gen x2=x*x
 gen x3=x*x*x
 gen pass=x>=0
 gen inter=pass*x
 gen inter2=pass*x2
 gen inter3=pass*x3
 if "`test'"=="m" { 
   local opp="rsc_r"
 }
 else if "`test'"=="r" | "`test'"=="w" {
   local opp="rsc_m"
 }
 else if "`test'"=="any" {
   local opp="rsc_h"
 }
 foreach var of varlist srjrgrwin6 srgrwin6 anydeved_2sem de*_2sem maxnewhg1 maxnewhg2 maxnewhg3 maxnewhg4 posearn5 posearn6 posearn7  {
  disp "`var' `test' `sr'"
  qui probit `var' `opp' $covs if `test'sample==1 & x>=-100 & sr==0
  qui predict `var'hat_sr0_`test' if e(sample)==1
  qui probit `var' `opp' $covs if `test'sample==1 & x>=-100 & sr==1
  qui predict `var'hat_sr1_`test' if e(sample)==1
 }
 foreach var of varlist sumcredit_ac earn_yr5 earn_yr6 earn_yr7  {
  qui reg `var' `opp' $covs if `test'sample==1 & x>=-100 & sr==0
  qui predict `var'hat_sr0_`test' if e(sample)==1
  qui reg `var' `opp' $covs if `test'sample==1 & x>=-100 & sr==1
  qui predict `var'hat_sr1_`test' if e(sample)==1
 } 
}



/* -----------------------------------------------------------------------------------------
Set globals with lists of variables
------------------------------------------------------------------------------------------- */

*variables to test validity
global val="startfall white nontradage rsc_math rsc_read rsc_hgh econ_dis misecon early delay1 delay2 distmiss distl25nm distm50nm indistnm"

*Variables to send through RD for sr colleges
global sr1list1="srsch_yr0 sumcredit_ac"
global sr1list2="maxnewhg1 maxnewhg2 maxnewhg3 maxnewhg4 trandwn srgrwin4 srgrwin5 srgrwin6"

*Variables to send through RD for jr colleges
global sr0list1="srsch_yr0 jrsch_yr0 sumcredit_ac"
global sr0list2="maxnewhg1 maxnewhg2 maxnewhg3 maxnewhg4 tranup srjrgrwin4 srjrgrwin5 srjrgrwin6"

*Variables to send through RD for sr colleges
global sr1list1_sub="srsch_yr0  sumcredit_ac"
global sr1list2_sub="maxnewhg1 maxnewhg2 maxnewhg3 maxnewhg4 trandwn srgrwin4 srgrwin5 srgrwin6"

*Variables to send through RD for jr colleges
global sr0list1_sub="srsch_yr0 jrsch_yr0 sumcredit_ac"
global sr0list2_sub="maxnewhg1 maxnewhg2 maxnewhg3 maxnewhg4 tranup srjrgrwin4 srjrgrwin5 srjrgrwin6"


*global sr0list="anydeved_2sem"

/*
global val="white" 

global sr1list1="anydeved_2sem"
global sr1list2="trandwn"

*Variables to send through RD for jr colleges
global sr0list1="anydeved_2sem"
global sr0list2="tranup"


global sr1list1_sub="anydeved_2sem"
global sr1list2_sub="trandwn"

*Variables to send through RD for jr colleges
global sr0list1_sub="anydeved_2sem"
global sr0list2_sub="tranup"
*/

*List of subgroups
global sublist="male female l25 m50 nw ec old pwrt highremed lowremed etest ltest"
*global sublist="male"
d $sublist

/* ---------------------------------------------------------------------------------
Create variables for 2 endog regressors
----------------------------------------------------------------------------------- */

foreach test in m r {
 gen x_`test'=rsc_`test'
 gen x2_`test'=(x_`test')^2
 gen x3_`test'=(x_`test')^3
 gen pass_`test'=x_`test'>=0
 gen inter_`test'=x_`test'*pass_`test'
 gen inter2_`test'=x2_`test'*pass_`test'
 gen inter3_`test'=x3_`test'*pass_`test'
}

d x_r
egen x_r_m=group(x_r x_m)

save ${d1}results/feb10.dta, replace 

foreach sample in m r w any {
 foreach sr in 0 1 {
       * preserve
	   use ${d1}results/feb10.dta, clear 
	cap drop x pass
        gen x=rsc_`sample'
	  gen pass=rsc_`sample'>=0
       collapse ${sr`sr'list1} ${sr`sr'list2} de*_*sem anydeved anydeved_2sem ${val} *hat_sr*_* earn_yr5 earn_yr6 earn_yr7 copearn_yr5 copearn_yr6 copearn_yr7 posearn5 posearn6 posearn7 (count) n=anydeved_2sem nfsem=sem1sch_fsem   nearn_yr5=earn_yr5 nearn_yr6=earn_yr6 nearn_yr7=earn_yr7 nposearn5=posearn5 nposearn6=posearn6 nposearn7=posearn7 if `sample'sample==1 & win==1, by(x sr)
       gen sample="`sample'"
       *saveold ${basepath}/remediation/results/clps_main`sr'_`sample', replace
       saveold ${d1}results/clps_main`sr'_`sample', replace

		/************ Set to missing if cell size is <5 *******************/

	   *First get # obs in each cell for ALL variables
	   use ${d1}results/feb10.dta, clear 
	cap drop x pass
        gen x=rsc_`sample'
	  gen pass=rsc_`sample'>=0
       collapse (count) ${sr`sr'list1} ${sr`sr'list2} de*_*sem anydeved anydeved_2sem ${val} *hat_sr*_* earn_yr5 earn_yr6 earn_yr7 copearn_yr5 copearn_yr6 copearn_yr7 posearn5 posearn6 posearn7 if `sample'sample==1 & win==1, by(x sr)
       foreach var of varlist _all {
        rename `var' `var'n     /* Rename # obs variables so not to get confused with the cell means */
       }
       rename srn sr 
       rename xn x
       gen sample="`sample'"
       merge x sr using ${d1}results/clps_main`sr'_`sample', unique sort  /* Merge to cell means */
       assert _m==3
       drop _m
         * Set cell means to missing if cell size <5
       foreach var of varlist ${sr`sr'list1} ${sr`sr'list2} de*_*sem anydeved anydeved_2sem ${val} *hat_sr*_m *hat_sr*_r *hat_sr*_w *hat_sr*_any  earn_yr5 earn_yr6 earn_yr7 copearn_yr5 copearn_yr6 copearn_yr7 posearn5 posearn6 posearn7 {
	qui replace `var'=. if `var'n<5 | `var'n==.
	qui replace `var'n=. if `var'n<5 | `var'n==.
       }
         * Set number of obs to missing if cell size <5
       foreach var of varlist n nfsem nearn_yr5 nearn_yr6 nearn_yr7 nposearn5 nposearn6 nposearn7 {
        qui replace `var'=. if `var'<5 | `var'==.
       }
       saveold ${d1}results/clps_main`sr'_`sample', replace



	   *restore
	   *preserve
	   use ${d1}results/feb10.dta, clear 
       collapse ${sr`sr'list1} ${sr`sr'list2}  ${val} *hat_sr*_* earn_yr5 earn_yr6 earn_yr7 copearn_yr5 copearn_yr6 copearn_yr7 posearn5 posearn6 posearn7  (count) n=anydeved_2sem nfsem=sem1sch_fsem   nearn_yr5=earn_yr5 nearn_yr6=earn_yr6 nearn_yr7=earn_yr7 nposearn5=posearn5 nposearn6=posearn6 nposearn7=posearn7 if `sample'sample==1 & win==1 & inrange(rsc_`sample',-100,80), by(sr)
       gen sample="`sample'"
       gen stat="mean"
       *saveold ${basepath}/remediation/results/means_main`sr'_`sample', replace
       saveold ${d1}results/means_main`sr'_`sample', replace
	   *restore
     *preserve
       use ${d1}results/feb10.dta, clear
		cap drop pass
	  gen pass=rsc_`sample'>=0
	   collapse ${sr`sr'list1} ${sr`sr'list2}  ${val} *hat_sr*_* earn_yr5 earn_yr6 earn_yr7 copearn_yr5 copearn_yr6 copearn_yr7 posearn5 posearn6 posearn7  (count) n=anydeved_2sem  nfsem=sem1sch_fsem   nearn_yr5=earn_yr5 nearn_yr6=earn_yr6 nearn_yr7=earn_yr7 nposearn5=posearn5 nposearn6=posearn6 nposearn7=posearn7 if `sample'sample==1 & win==1 & inrange(rsc_`sample',-100,80), by(sr pass)
       gen sample="`sample'"
       gen stat="mean"
       *saveold ${basepath}/remediation/results/meansbypass_main`sr'_`sample', replace
       saveold ${d1}results/meansbypass_main`sr'_`sample', replace
     *restore
     *preserve 
       use ${d1}results/feb10.dta, clear
	   collapse (sd) ${sr`sr'list1} ${sr`sr'list2}  ${val} *hat_sr*_* earn_yr5 earn_yr6 earn_yr7 copearn_yr5 copearn_yr6 copearn_yr7 posearn5 posearn6 posearn7  if `sample'sample==1 & win==1 & inrange(rsc_`sample',-100,80), by(sr)
       gen sample="`sample'"
       gen stat="sd"
       *saveold ${basepath}/remediation/results/sd_main`sr'_`sample', replace
       saveold ${d1}results/sd_main`sr'_`sample', replace
     *restore
     *preserve
       use ${d1}results/feb10.dta, clear
	cap drop  pass
	  gen pass=rsc_`sample'>=0
	   collapse (sd) ${sr`sr'list1} ${sr`sr'list2}  ${val} *hat_sr*_* earn_yr5 earn_yr6 earn_yr7 copearn_yr5 copearn_yr6 copearn_yr7 posearn5 posearn6 posearn7  if `sample'sample==1 & win==1 & inrange(rsc_`sample',-100,80), by(sr pass)
       gen sample="`sample'"
       gen stat="sd"
       *saveold ${basepath}/remediation/results/sdbypass_main`sr'_`sample', replace
       saveold ${d1}results/sdbypass_main`sr'_`sample', replace
   *restore

 }
}



*use ${basepath}/remediation/results/means_main0_any, clear
*append using ${basepath}/remediation/results/sd_main0_any
use ${d1}results/means_main0_any, clear
append using ${d1}results/sd_main0_any

*append using ${basepath}/remediation/results/means_main1_any
*append using ${basepath}/remediation/results/sd_main1_any
append using ${d1}results/means_main1_any
append using ${d1}results/sd_main1_any

forvalues sr=0/1 {
foreach f in means sd meansbypass sdbypass {
 foreach test in m r w { 
   
   *append using ${basepath}/remediation/results/`f'_main`sr'_`test'
   append using ${d1}results/`f'_main`sr'_`test'
 }
}
}
*saveold ${basepath}/remediation/results/means_sd_main_all.dta, replace
saveold ${d1}results/means_sd_main_all.dta, replace

*use ${basepath}/remediation/results/clps_main0_any, clear
*append using ${basepath}/remediation/results/clps_main1_any
use ${d1}results/clps_main0_any, clear
append using ${d1}results/clps_main1_any

forvalues sr=0/1 {
*append using ${basepath}/remediation/results/clps_main`sr'_m.dta
*append using ${basepath}/remediation/results/clps_main`sr'_r.dta
*append using ${basepath}/remediation/results/clps_main`sr'_w.dta
*saveold ${basepath}/remediation/results/clps_main`sr'_all, replace
append using ${d1}results/clps_main`sr'_m.dta
append using ${d1}results/clps_main`sr'_r.dta
append using ${d1}results/clps_main`sr'_w.dta

foreach var of varlist srsch_yr0 - posearn7 {
replace `var'=. if n<5 | n==.
}

foreach var of varlist jrsch_yr0 srsch_yr0  {
replace `var'=. if nfsem<5 | nfsem==.
}

forvalues y=5/7 {
 replace earn_yr`y'=. if nearn_yr`y'<5 | nearn_yr`y'==.
 replace posearn`y'=. if nposearn`y'<5 | nposearn`y'==.
}

saveold ${d1}results/clps_main`sr'_all, replace
}
